In [1]:
#%pip install ydata_profiling
#pip install --upgrade pandas_profiling
#pip install numpy==1.19.3
#pip install pandas
#pip show pandas
#pip install openpyxl
In [2]:
#pip install numpy
In [3]:
#pip install ipywidgets
In [4]:
import pandas as pd
from ydata_profiling import ProfileReport
from pathlib import Path
from django.db.models import Max
# from pydantic_settings import BaseSettings
#from pandas_profiling import ProfileReport

# Your other code here
In [5]:
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
In [6]:
from DataTune.models import UploadedData
In [7]:
#data = UploadedData.objects.get(id=1)
latest_uploaded_data = UploadedData.objects.latest('uploaded_at')

#data = UploadedData.objects.latest('uploaded_at')
In [8]:
# data = pd.read_csv(UploadedData.objects.all())
with open(latest_uploaded_data.file.path, 'rb') as file:
    content = file.read()
file_path = latest_uploaded_data.file.path
    
# if file_path.endswith('csv'):
#     df = pd.read_csv(file_path)
# elif file_path.endswith('.xls','.xlsx'):
#     df = pd.read_excel(file_path, engine = 'openpyxl')
# # df = pd.DataFrame.from_records(content)
if file_path.endswith(('csv', '.xls', '.xlsx')):
    if file_path.endswith('.csv'):
        #df = pd.read_csv(file_path, header=None)
        df = pd.read_csv(file_path, delimiter=',', encoding='utf-8',skiprows=4)
    else:
        df = pd.read_excel(file_path, engine='openpyxl')
else:
    # Handle unsupported file format or raise an exception
    raise ValueError("Unsupported file format. Only CSV, XLS, and XLSX are supported.")
In [9]:
df.head()
# content
Out[9]:
WineClass Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline
0 3 12.85 3.27 2.58 22.0 106.0 1.65 0.60 0.60 0.96 5.58 0.87 2.11 570.0
1 2 12.64 1.36 2.02 16.8 100.0 2.02 1.41 0.53 0.62 5.75 0.98 1.59 450.0
2 2 12.08 1.13 2.51 24.0 78.0 2.00 1.58 0.40 1.40 2.20 1.31 2.72 630.0
3 2 12.00 1.51 2.42 22.0 86.0 1.45 1.25 0.50 1.63 3.60 1.05 2.65 450.0
4 1 13.64 3.10 2.56 15.2 116.0 2.70 3.03 0.17 1.66 5.10 0.96 3.36 845.0
In [10]:
# pip install pandas-profiling
In [11]:
profile = ProfileReport(df, title = 'Pandas Profiling EDA Report', explorative = True)
In [12]:
profile.to_notebook_iframe()
In [13]:
# Extract the original filename from the UploadedData model
original_filename = latest_uploaded_data.file.name.split('.')[0]  # Assuming the file is stored in a subdirectory

# Remove the file extension (if any)
filename_without_extension = Path(original_filename).stem
In [14]:
# file_path = "..\media\EDA_report\pandas_profiling_report.html"
# profile.to_file(file_path)
file_path = Path("..") / "media" / "EDA_report" / f"pandas_profiling_report_{filename_without_extension}.html"

# Ensure that the directory exists, and create it if not
file_path.parent.mkdir(parents=True, exist_ok=True)

# Save the report as an HTML file
profile.to_file(file_path)
In [15]:
# %%capture captured_output
# %%HTML
# <h1>My EDA Report</h1>
# <p>This is some content of your EDA report.
# profile.to_notebook_iframe()
# </p>
# with open('C:\Users\Sristi\DataTune_app\DataTune_app\media\EDA_report\eda_report.html', 'w') as file:
#     file.write(captured_output.stdout)
In [16]:
pip install lazypredict
Requirement already satisfied: lazypredict in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (0.2.12)
Requirement already satisfied: click in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (8.1.7)
Requirement already satisfied: scikit-learn in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (1.4.0)
Requirement already satisfied: pandas in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (2.2.0)
Requirement already satisfied: tqdm in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (4.66.1)
Requirement already satisfied: joblib in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (1.3.2)
Requirement already satisfied: lightgbm in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (4.3.0)
Requirement already satisfied: xgboost in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lazypredict) (2.0.3)
Requirement already satisfied: colorama in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from click->lazypredict) (0.4.6)
Requirement already satisfied: numpy in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lightgbm->lazypredict) (1.25.2)
Requirement already satisfied: scipy in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from lightgbm->lazypredict) (1.11.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from pandas->lazypredict) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from pandas->lazypredict) (2024.1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from pandas->lazypredict) (2023.4)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from scikit-learn->lazypredict) (3.2.0)
Requirement already satisfied: six>=1.5 in c:\users\sristi\datatune_app\datatune_app\.venv\lib\site-packages (from python-dateutil>=2.8.2->pandas->lazypredict) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
In [17]:
import lazypredict
In [ ]: